## This file cleans Dave Leip's Turnout data, downloaded from: https://doi.org/10.7910/DVN/WRSW25 ##
## Created by Meredith Dost and last run 8/16/2025 ##

# load in packages
library(tidyverse)
library(readxl)

# set working directory
#setwd("")

# read in Leip turnout data, year by year, and prep for analysis
###### start with 2010, Version 1.0, Date	7/2/2016 ###### 
leip <- read_excel("Turnout_Data_2010.xlsx", sheet = "County VTO")
# remove empty rows
leip <- leip[complete.cases(leip[,1]),]
# rename columns and subset
colnames(leip)[c(1,2)] <- c("county","state")
vars <- c("county","state","FIPS","Turnout")
leip <- leip[vars]
colnames(leip)[3:4] <- c("fips","turnout10")
# removing state totals / irrelevant state anomalies
leip <- subset(leip, state != "T")
leip <- subset(leip, county != "Statewide")
leip <- subset(leip, county != "Federal/Limited")
leip <- subset(leip, county != "Overseas")
# filling in missing OK turnout data for counties that had "zero" turnout (based on p21-22 https://oklahoma.gov/content/dam/ok/en/elections/election-results/2010-election-results/2010-general-election-results/ge-results-county-20101102.pdf)
leip$turnout10[leip$state=="OK" & leip$county=="Carter"] <- 12709
leip$turnout10[leip$state=="OK" & leip$county=="Cleveland"] <- 71970
leip$turnout10[leip$state=="OK" & leip$county=="Comanche"] <- 21336
leip$turnout10[leip$state=="OK" & leip$county=="Cotton"] <- 1779
leip$turnout10[leip$state=="OK" & leip$county=="Garvin"] <- 8710
leip$turnout10[leip$state=="OK" & leip$county=="Grady"] <- 15526
leip$turnout10[leip$state=="OK" & leip$county=="Jefferson"] <- 1747
leip$turnout10[leip$state=="OK" & leip$county=="Love"] <- 2592
leip$turnout10[leip$state=="OK" & leip$county=="Marshall"] <- 3933
leip$turnout10[leip$state=="OK" & leip$county=="McClain"] <- 10889
leip$turnout10[leip$state=="OK" & leip$county=="Murray"] <- 3905
leip$turnout10[leip$state=="OK" & leip$county=="Pontotoc"] <- 9510
leip$turnout10[leip$state=="OK" & leip$county=="Stephens"] <- 14398
leip$turnout10[leip$state=="OK" & leip$county=="Tillman"] <- 2032
# filling in missing LA turnout data for counties that had "zero" turnout (based onhttps://www.sos.la.gov/ElectionsAndVoting/Pages/PostElectionStatisticsParish.aspx)
leip$turnout10[leip$state=="LA" & leip$county=="Acadia"] <- 17683
leip$turnout10[leip$state=="LA" & leip$county=="Calcasieu"] <- 52451
leip$turnout10[leip$state=="LA" & leip$county=="Cameron"] <- 2259
leip$turnout10[leip$state=="LA" & leip$county=="Jefferson Davis"] <- 8601
leip$turnout10[leip$state=="LA" & leip$county=="Lafayette"] <- 65411
leip$turnout10[leip$state=="LA" & leip$county=="St. Landry"] <- 28562
leip$turnout10[leip$state=="LA" & leip$county=="Vermilion"] <- 15706
vote10 <- leip
## finalizing 2010 registration and vote data by county ##
vote10 <- vote10[,3:4] # subsetting to only variables we need
vote10$year <- 2010
names(vote10)[c(2)] <- c("turnout")
# adding DC
vote10.dc <- vote10[1,]
vote10.dc[1,] <- list(11001,135846,2010)
vote10 <- rbind.data.frame(vote10,vote10.dc)
# clean up environment
rm(list=setdiff(ls(), c("vote10")))

###### 2012, Version 1.1, Date 10/20/2016 ###### 
leip <- read_excel("Turnout_Data_2012.xlsx", sheet = "County VTO")
# remove empty rows
leip <- leip[complete.cases(leip[,1]),]
# rename columns and subset
colnames(leip)[c(1,2)] <- c("county","state")
vars <- c("county","state","FIPS","Total Vote")
leip <- leip[vars]
colnames(leip)[c(3,4)] <- c("fips","turnout12")
# removing state totals / irrelevant state anomalies
leip <- subset(leip, state != "T")
leip <- subset(leip, county != "Statewide")
leip <- subset(leip, county != "Federal")
leip <- subset(leip, county != "Overseas")
vote12 <- leip
## finalizing 2012 registration and vote data by county ##
vote12 <- vote12[,3:4] # subsetting to only variables we need
vote12$year <- 2012
names(vote12)[c(2)] <- c("turnout")
# clean up environment
rm(list=setdiff(ls(), c("vote10","vote12")))

###### 2014, Version 1.0, Date 7/1/2016 ###### 
leip <- read_excel("Turnout_Data_2014.xlsx", sheet = "County VTO")
# remove empty rows
leip <- leip[complete.cases(leip[,1]),]
# rename columns and subset
colnames(leip)[c(1,2)] <- c("county","state")
vars <- c("county","state","FIPS","Turnout")
leip <- leip[vars]
colnames(leip)[3:4] <- c("fips","turnout14")
# filling in missing FL turnout data for Pasco county w/"zero" turnout, based on https://results.elections.myflorida.com/TurnoutRpt.asp?ElectionDate=11/4/2014&DATAMODE=)
leip$turnout14[leip$state=="FL" & leip$county=="Pasco"] <- 162736
# filling in missing OK turnout data for counties that had "zero" turnout (based on U.S. Senator https://results.okelections.gov/OKER/?elecDate=20141104)
leip$turnout14[leip$state=="OK" & leip$county=="Tulsa"] <- 131143
leip$turnout14[leip$state=="OK" & leip$county=="Wagoner"] <- 16686
leip$turnout14[leip$state=="OK" & leip$county=="Washington"] <- 13208
# filling in missing TX turnout data for Delta county w/"zero" turnout, based on https://elections.sos.state.tx.us/elchist175_county60.htm
leip$turnout14[leip$state=="TX" & leip$county=="Delta"] <- 1331
# removing state totals / irrelevant state anomalies
leip <- subset(leip, state != "T")
leip <- subset(leip, county != "Statewide")
leip <- subset(leip, county != "Federal")
leip <- subset(leip, county != "Overseas")
vote14 <- leip
## finalizing 2014 registration and vote data by county ##
vote14 <- vote14[,3:4] # subsetting to only variables we need
vote14$year <- 2014
names(vote14)[c(2)] <- c("turnout")
# adding DC
vote14.dc <- vote14[1,]
vote14.dc[1,] <- list(11001,177377,2014)
vote14 <- rbind.data.frame(vote14,vote14.dc)
# clean up environment
rm(list=setdiff(ls(), c("vote10","vote12","vote14")))

###### 2016, Version 0.9, Date 4/2/2017 ###### 
leip <- read_excel("Turnout_Data_2016.xlsx", sheet = "County VTO")
# remove empty rows
leip <- leip[complete.cases(leip[,1]),]
# rename columns and subset
colnames(leip)[c(1,2)] <- c("county","state")
vars <- c("county","state","FIPS","Turnout")
leip <- leip[vars]
colnames(leip)[3:4] <- c("fips","turnout16")
# removing state totals / irrelevant state anomalies
leip <- subset(leip, state != "T")
leip <- subset(leip, county != "Statewide")
leip <- subset(leip, county != "Federal")
leip <- subset(leip, county != "Overseas")
vote16 <- leip
## finalizing 2016 registration and vote data by county ##
vote16 <- vote16[,3:4] # subsetting to only variables we need
vote16$year <- 2016
names(vote16)[c(2)] <- c("turnout")
# clean up environment
rm(list=setdiff(ls(), c("vote10","vote12","vote14","vote16")))

###### 2018, Version 0.9, Date 4/9/2019 ###### 
leip <- read_excel("Turnout_Data_2018.xlsx", sheet = "County VTO")
# remove empty rows
leip <- leip[complete.cases(leip[,1]),]
# rename columns and subset
colnames(leip)[c(1,2)] <- c("county","state")
vars <- c("county","state","FIPS","Turnout")
leip <- leip[vars]
colnames(leip)[3:4] <- c("fips","turnout18")
# removing state totals / irrelevant state anomalies
leip <- subset(leip, state != "T")
leip <- subset(leip, county != "Statewide")
leip <- subset(leip, county != "Federal")
leip <- subset(leip, county != "Overseas")
# removing unnecessary var
leip <- leip[,c(3:4)]
vote18 <- leip
## finalizing 2018 registration and vote data by county ##
vote18$year <- 2018
names(vote18)[c(2)] <- c("turnout")
# adding DC
vote18.dc <- vote18[1,]
vote18.dc[1,] <- list(11001,231700,2018)
vote18 <- rbind.data.frame(vote18,vote18.dc)
# clean up environment
rm(list=setdiff(ls(), c("vote10","vote12","vote14","vote16","vote18")))

###### 2020, Version 0.90, Date 2/21/21###### 
leip <- read_excel("Turnout_Data_2020.xlsx", sheet = "County VTO")
# remove empty rows
leip <- leip[complete.cases(leip[,1]),]
# rename columns and subset
colnames(leip)[c(1,2)] <- c("county","state")
vars <- c("county","state","FIPS","Turnout")
leip <- leip[vars]
colnames(leip)[3:4] <- c("fips","turnout20")
# removing state totals ("T") 
leip <- subset(leip, state != "T")
# keeping only necessary vars
leip <- leip[,c(3:4)]
vote20 <- leip
## finalizing 2020 registration and vote data by county ##
vote20$year <- 2020
names(vote20)[c(2)] <- c("turnout")

### merge ###
leip <- rbind.data.frame(vote10, vote12, vote14, vote16, vote18, vote20)

#setwd("/voting_data/")
write.table(leip, "leip_turnout_county.csv",sep=",",row.names=F)

